This is a guided project that I completed using the data science learning platform Dataquest.In this project, I will look at data from New York City schools to understand whether parent, teacher or student perceptions of the below factors affect average SAT scores:
The data, collected in 2011, are publicly accessible and can be accessed here.
To begin, I’ll import the tidyverse
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'tidyr' was built under R version 3.6.2
## -- Conflicts ------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Next, I’ll set my working directory and import the data
setwd("C:/Users/Dennis/Desktop/nyc_schools_project")
sat <- read_csv("combined.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## DBN = col_character(),
## school_name = col_character(),
## boro = col_character()
## )
## See spec(...) for full column specifications.
gened <- read_tsv("masterfile11_gened_final.txt")
## Parsed with column specification:
## cols(
## .default = col_double(),
## dbn = col_character(),
## bn = col_character(),
## schoolname = col_character(),
## studentssurveyed = col_character(),
## schooltype = col_character(),
## p_q1 = col_logical(),
## p_q3d = col_logical(),
## p_q9 = col_logical(),
## p_q10 = col_logical(),
## p_q12aa = col_logical(),
## p_q12ab = col_logical(),
## p_q12ac = col_logical(),
## p_q12ad = col_logical(),
## p_q12ba = col_logical(),
## p_q12bb = col_logical(),
## p_q12bc = col_logical(),
## p_q12bd = col_logical(),
## t_q6m = col_logical(),
## t_q9 = col_logical(),
## t_q10a = col_logical()
## # ... with 18 more columns
## )
## See spec(...) for full column specifications.
d75 <- read_tsv("masterfile11_d75_final.txt")
## Parsed with column specification:
## cols(
## .default = col_double(),
## dbn = col_character(),
## bn = col_character(),
## schoolname = col_character(),
## studentssurveyed = col_character(),
## schooltype = col_character(),
## p_q5 = col_logical(),
## p_q9 = col_logical(),
## p_q13a = col_logical(),
## p_q13b = col_logical(),
## p_q13c = col_logical(),
## p_q13d = col_logical(),
## p_q14a = col_logical(),
## p_q14b = col_logical(),
## p_q14c = col_logical(),
## p_q14d = col_logical(),
## t_q11a = col_logical(),
## t_q11b = col_logical(),
## t_q14 = col_logical(),
## t_q15a = col_logical(),
## t_q15b = col_logical()
## # ... with 14 more columns
## )
## See spec(...) for full column specifications.
Now, let’s preview our data for sat
head(sat)
Then, let’s take a look at our gened data
head(gened)
Then, we’ll look at our d75 data
head(d75)
Let’s trim our gened dataframe so that we’re only looking at necessary variables
gened_trimmed <- gened %>%
select(dbn,bn, schoolname, d75,studentssurveyed,highschool,schooltype,saf_tot_11, com_tot_11, eng_tot_11, aca_tot_11) %>%
rename(safety_respect_score = saf_tot_11, communication_score = com_tot_11, engagement_score = eng_tot_11,
academic_expectations_score = aca_tot_11)
head(gened)
Let’s do the same for our d75 dataframe
d75_trimmed <- d75 %>%
select(dbn,bn, schoolname, d75,studentssurveyed,highschool,schooltype,saf_tot_11, com_tot_11, eng_tot_11, aca_tot_11) %>%
rename(safety_respect_score = saf_tot_11, communication_score = com_tot_11, engagement_score = eng_tot_11,
academic_expectations_score = aca_tot_11)
head(d75)
Let’s combine our d75 dataframe with our gened dataframe. We’ll rename it survey
survey <- d75_trimmed %>%
rbind(gened_trimmed)
head(survey)
Now, let’s join our survey dataframe with our sat dataframe. We’ll call it master.
master <- sat %>%
inner_join(survey,by=c("DBN"="dbn")) %>%
select(-schoolname,-bn) %>%
mutate(avg_score = (safety_respect_score+communication_score+engagement_score+academic_expectations_score)/4)
head(master)
Let’s create a function that let’s us find the correlations between a variable and a list of other variables
find_cor <- function(x,y,dat) {
strength_corr <- function(z){
if (z > 0.5){
"Strong positive correlation"
} else if (z < -0.5){
"Strong negative correlation"
} else {
"Weak Correlation"
}
}
cor_mat <- dat %>%
select(x,y) %>%
drop_na() %>%
cor(method = "pearson") %>%
round(2) %>%
.[,1, drop = FALSE] %>%
as_tibble(rownames="variables") %>%
rename(corrs = x) %>%
arrange(desc(corrs)) %>%
mutate(corr_strength = map_chr(corrs, strength_corr))
return(cor_mat)
}
Let’s find the correlation between avg_sat_score and each of the four survey scores. Let’s see if there is any correlation between those scores and sat scores.
find_cor("avg_sat_score",c("safety_respect_score","communication_score","engagement_score","academic_expectations_score"),master)
It looks like avg_sat_score most closely correlates with safety_respect_score (.28), followed in order by expectations_score (.18), engagement_score (.10) and communication_score (.09).
Let’s repeat this process by seeing if avg_sat_score correlates with other variables.
find_cor("avg_sat_score",c("frl_percent","ell_percent","sped_percent","asian_per","black_per","white_per","hispanic_per",
"male_per","female_per","grads_percent","dropout_percent"),master)
Let’s create a list that contains all numeric column titles, and another variables which we decide determine “school quality”
numeric_columns <- colnames(master[,map_lgl(master,is.numeric)])[1:25]
school_quality_columns <- c("SAT Writing Avg. Score","high_score_percent","dropout_percent","SAT Critical Reading Avg. Score",
"SAT Math Avg. Score","exams_per_student","avg_sat_score", "communication_score",
"engagement_score","grads_percent","academic_expectations_score","safety_respect_score")
Now let’s iterate through our school quality columns and see each correlation dataframe.
for (measure in school_quality_columns){
print(find_cor(measure, numeric_columns,master))
}
## # A tibble: 25 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 SAT Writing Avg. Score 1 Strong positive correlation
## 2 SAT Critical Reading Avg. Score 0.99 Strong positive correlation
## 3 avg_sat_score 0.99 Strong positive correlation
## 4 SAT Math Avg. Score 0.94 Strong positive correlation
## 5 grads_percent 0.74 Strong positive correlation
## 6 white_per 0.64 Strong positive correlation
## 7 high_score_percent 0.63 Strong positive correlation
## 8 Number of Exams with scores 3 4 or 5 0.62 Strong positive correlation
## 9 AP Test Takers 0.59 Strong positive correlation
## 10 Total Exams Taken 0.580 Strong positive correlation
## # ... with 15 more rows
## # A tibble: 25 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 high_score_percent 1 Strong positive correlation
## 2 SAT Math Avg. Score 0.66 Strong positive correlation
## 3 avg_sat_score 0.65 Strong positive correlation
## 4 SAT Writing Avg. Score 0.63 Strong positive correlation
## 5 SAT Critical Reading Avg. Score 0.61 Strong positive correlation
## 6 asian_per 0.51 Strong positive correlation
## 7 Number of Exams with scores 3 4 or 5 0.48 Weak Correlation
## 8 grads_percent 0.42 Weak Correlation
## 9 AP Test Takers 0.41 Weak Correlation
## 10 Total Exams Taken 0.41 Weak Correlation
## # ... with 15 more rows
## # A tibble: 25 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 dropout_percent 1 Strong positive correlation
## 2 sped_percent 0.54 Strong positive correlation
## 3 frl_percent 0.48 Weak Correlation
## 4 ell_percent 0.43 Weak Correlation
## 5 hispanic_per 0.41 Weak Correlation
## 6 selfcontained_num 0.37 Weak Correlation
## 7 Total Cohort 0.22 Weak Correlation
## 8 male_per 0.21 Weak Correlation
## 9 total_enrollment 0.07 Weak Correlation
## 10 black_per 0.03 Weak Correlation
## # ... with 15 more rows
## # A tibble: 25 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 SAT Critical Reading Avg. Score 1 Strong positive correlation
## 2 SAT Writing Avg. Score 0.99 Strong positive correlation
## 3 avg_sat_score 0.99 Strong positive correlation
## 4 SAT Math Avg. Score 0.94 Strong positive correlation
## 5 grads_percent 0.72 Strong positive correlation
## 6 Number of Exams with scores 3 4 or 5 0.63 Strong positive correlation
## 7 high_score_percent 0.61 Strong positive correlation
## 8 white_per 0.61 Strong positive correlation
## 9 AP Test Takers 0.59 Strong positive correlation
## 10 Total Exams Taken 0.59 Strong positive correlation
## # ... with 15 more rows
## # A tibble: 25 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 SAT Math Avg. Score 1 Strong positive correlation
## 2 avg_sat_score 0.98 Strong positive correlation
## 3 SAT Critical Reading Avg. Score 0.94 Strong positive correlation
## 4 SAT Writing Avg. Score 0.94 Strong positive correlation
## 5 asian_per 0.72 Strong positive correlation
## 6 grads_percent 0.68 Strong positive correlation
## 7 Number of Exams with scores 3 4 or 5 0.66 Strong positive correlation
## 8 high_score_percent 0.66 Strong positive correlation
## 9 AP Test Takers 0.65 Strong positive correlation
## 10 Total Exams Taken 0.64 Strong positive correlation
## # ... with 15 more rows
## # A tibble: 25 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 exams_per_student 1 Strong positive correlation
## 2 Total Exams Taken 0.44 Weak Correlation
## 3 AP Test Takers 0.4 Weak Correlation
## 4 Number of Exams with scores 3 4 or 5 0.38 Weak Correlation
## 5 SAT Math Avg. Score 0.31 Weak Correlation
## 6 avg_sat_score 0.3 Weak Correlation
## 7 grads_percent 0.3 Weak Correlation
## 8 Num of SAT Test Takers 0.290 Weak Correlation
## 9 SAT Critical Reading Avg. Score 0.28 Weak Correlation
## 10 SAT Writing Avg. Score 0.28 Weak Correlation
## # ... with 15 more rows
## # A tibble: 25 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 avg_sat_score 1 Strong positive correlation
## 2 SAT Critical Reading Avg. Score 0.99 Strong positive correlation
## 3 SAT Writing Avg. Score 0.99 Strong positive correlation
## 4 SAT Math Avg. Score 0.98 Strong positive correlation
## 5 grads_percent 0.72 Strong positive correlation
## 6 Number of Exams with scores 3 4 or 5 0.65 Strong positive correlation
## 7 high_score_percent 0.65 Strong positive correlation
## 8 AP Test Takers 0.62 Strong positive correlation
## 9 asian_per 0.62 Strong positive correlation
## 10 white_per 0.62 Strong positive correlation
## # ... with 15 more rows
## # A tibble: 26 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 communication_score 1 Strong positive correlation
## 2 grads_percent 0.36 Weak Correlation
## 3 white_per 0.3 Weak Correlation
## 4 SAT Writing Avg. Score 0.26 Weak Correlation
## 5 avg_sat_score 0.25 Weak Correlation
## 6 SAT Critical Reading Avg. Score 0.24 Weak Correlation
## 7 SAT Math Avg. Score 0.23 Weak Correlation
## 8 high_score_percent 0.18 Weak Correlation
## 9 female_per 0.1 Weak Correlation
## 10 exams_per_student 0.09 Weak Correlation
## # ... with 16 more rows
## # A tibble: 26 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 engagement_score 1 Strong positive correlation
## 2 white_per 0.37 Weak Correlation
## 3 grads_percent 0.36 Weak Correlation
## 4 SAT Writing Avg. Score 0.31 Weak Correlation
## 5 avg_sat_score 0.3 Weak Correlation
## 6 SAT Critical Reading Avg. Score 0.290 Weak Correlation
## 7 SAT Math Avg. Score 0.28 Weak Correlation
## 8 high_score_percent 0.2 Weak Correlation
## 9 avg_class_size 0.15 Weak Correlation
## 10 Num of SAT Test Takers 0.12 Weak Correlation
## # ... with 16 more rows
## # A tibble: 25 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 grads_percent 1 Strong positive correlation
## 2 SAT Writing Avg. Score 0.74 Strong positive correlation
## 3 SAT Critical Reading Avg. Score 0.72 Strong positive correlation
## 4 avg_sat_score 0.72 Strong positive correlation
## 5 SAT Math Avg. Score 0.68 Strong positive correlation
## 6 high_score_percent 0.42 Weak Correlation
## 7 white_per 0.42 Weak Correlation
## 8 female_per 0.33 Weak Correlation
## 9 Number of Exams with scores 3 4 or 5 0.32 Weak Correlation
## 10 AP Test Takers 0.3 Weak Correlation
## # ... with 15 more rows
## # A tibble: 26 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 academic_expectations_score 1 Strong positive correlation
## 2 grads_percent 0.49 Weak Correlation
## 3 SAT Writing Avg. Score 0.4 Weak Correlation
## 4 SAT Critical Reading Avg. Score 0.38 Weak Correlation
## 5 avg_sat_score 0.38 Weak Correlation
## 6 SAT Math Avg. Score 0.36 Weak Correlation
## 7 white_per 0.3 Weak Correlation
## 8 high_score_percent 0.24 Weak Correlation
## 9 exams_per_student 0.18 Weak Correlation
## 10 female_per 0.16 Weak Correlation
## # ... with 16 more rows
## # A tibble: 26 x 3
## variables corrs corr_strength
## <chr> <dbl> <chr>
## 1 safety_respect_score 1 Strong positive correlation
## 2 grads_percent 0.61 Strong positive correlation
## 3 SAT Writing Avg. Score 0.54 Strong positive correlation
## 4 SAT Math Avg. Score 0.53 Strong positive correlation
## 5 avg_sat_score 0.53 Strong positive correlation
## 6 SAT Critical Reading Avg. Score 0.49 Weak Correlation
## 7 high_score_percent 0.4 Weak Correlation
## 8 white_per 0.38 Weak Correlation
## 9 asian_per 0.31 Weak Correlation
## 10 female_per 0.23 Weak Correlation
## # ... with 16 more rows
numeric_columns
## [1] "Num of SAT Test Takers"
## [2] "SAT Critical Reading Avg. Score"
## [3] "SAT Math Avg. Score"
## [4] "SAT Writing Avg. Score"
## [5] "avg_sat_score"
## [6] "AP Test Takers"
## [7] "Total Exams Taken"
## [8] "Number of Exams with scores 3 4 or 5"
## [9] "exams_per_student"
## [10] "high_score_percent"
## [11] "avg_class_size"
## [12] "frl_percent"
## [13] "total_enrollment"
## [14] "ell_percent"
## [15] "sped_percent"
## [16] "selfcontained_num"
## [17] "asian_per"
## [18] "black_per"
## [19] "hispanic_per"
## [20] "white_per"
## [21] "male_per"
## [22] "female_per"
## [23] "Total Cohort"
## [24] "grads_percent"
## [25] "dropout_percent"
Let’s create a function that will visualize this.
visualize_corrs <- function(corrs,title){
ggplot(data = corrs) +
aes( x = reorder(variables,corrs), y = corrs, fill = corr_strength) +
scale_fill_manual(values = c("Strong positive correlation" ="blue", "Weak Correlation" = "gray",
"Strong negative correlation" = "red")) +
geom_bar(stat="identity", width = nrow(corrs)/(nrow(corrs)+5)) +
theme(panel.background = element_blank()) +
coord_flip() +
labs(title= title, x="Variables", y = "Correlations") +
theme(plot.title = element_text(hjust = -0.3, face = "bold"), axis.title.y = element_text(hjust=0.9, color = "gray"),
axis.title.x = element_text(hjust=0, color = "gray"), axis.text.x = element_text(color = "gray"),
legend.position = "none") +
geom_text(aes(label=corrs), hjust= -0.3)
}
Now, let’s visualize our first set of data.
avg_sat_score_corrs <- find_cor("avg_sat_score",c("safety_respect_score","communication_score",
"engagement_score","academic_expectations_score"),master)
visualize_corrs(corrs= avg_sat_score_corrs, title = "Correlations with Average SAT Score")
From this data, we can clearly see that safety respect score correlates most highly.
Let’s try this with additional data.
avg_sat_score_corrs_2 <- find_cor("avg_sat_score",c("frl_percent","ell_percent",
"sped_percent","asian_per","black_per","white_per",
"hispanic_per","male_per","female_per",
"grads_percent","dropout_percent"),master)
visualize_corrs(corrs = avg_sat_score_corrs_2, title = "Correlations with Average SAT Score")
Now that we have added additional variables, we can see that there are some variables that correlate very highly with average SAT score, and some of those have very strong negative correlations.
Now let’s try this with additional variables.
typeof(school_quality_columns)
## [1] "character"
for (measure in school_quality_columns){
c <- find_cor(measure, numeric_columns,master)
v <- visualize_corrs(corrs = c, title = paste("Correlations with", as.name(measure)))
print(v)
}
From these bar charts, we can see that each of our success metrics has its own set of variables that have a strong positive correlation or a strong negative correlation. Let’s filter out our weak correlations so that we only see strong correlations.
for (measure in school_quality_columns){
c <- find_cor(measure, numeric_columns,master)
c <- c %>%
filter(corr_strength != "Weak Correlation")
v <- visualize_corrs(corrs = c, title = paste("Correlations with", as.name(measure)))
print(v)
}
Now we can more easily see the correlations that are most strongly affecting each of our success metrics.
Now that we’ve seen these correlations, it would be interesting to see whether parents, students and teachers have similar perceptions about the four school quality metrics they were surveyed about:
Let’s go back to our dataset and see how these compare between parents, students and teachers.
head(gened)
gened_trimmed_2 <- gened %>%
select(colnames(gened[1:32]))
d75_trimmed_2 <- d75 %>%
select(colnames(d75[1:32]))
survey_2 <- gened_trimmed_2 %>%
rbind(d75_trimmed_2)
head(survey_2)
Let’s look at the size of our new data set
print(paste("Number of rows:", nrow(survey_2)))
## [1] "Number of rows: 1702"
print(paste("Number of columns:", ncol(survey_2)))
## [1] "Number of columns: 32"
Let’s see how many rows we are left with once we drop all rows that have any NA values. The effect of this is likely to remove any non-high schools.
survey_2 <- survey_2 %>%
drop_na()
print(paste("Number of rows:", nrow(survey_2)))
## [1] "Number of rows: 492"
print(paste("Number of columns:", ncol(survey_2)))
## [1] "Number of columns: 32"
Now, we’re left with a data set that is about one quarter of the original size. Since it’s still a large dataset, let’s keep it.
survey_3 <- survey_2 %>%
gather(key = "Survey Question", value = score, colnames(.[8:32])) %>%
mutate(response_type = str_sub(`Survey Question`, start = regexpr("_[tsp]",`Survey Question`)+1,
end = (regexpr("_[tsp]",`Survey Question`))+1)) %>%
filter(!`Survey Question` %in% c("aca_tot_11","com_tot_11","eng_tot_11","saf_tot_11")) %>%
mutate(response_type = ifelse(response_type == "p", "parent",
ifelse(`response_type` == "s", "student", "teacher"))) %>%
mutate(metric = str_sub(`Survey Question`, end = regexpr("_", `Survey Question`)-1)) %>%
mutate(metric = ifelse(metric == "aca","academic expectations",
ifelse(metric == "com", "communication",
ifelse(metric == "eng", "engagement",
ifelse(metric == "N", "number",
ifelse(metric == "nr","number eligible",
ifelse(metric == "rr", "response rate", "safety")))))))
survey_3
survey_3_rr_grouped <- survey_3 %>%
filter(metric == "response rate") %>%
group_by(response_type) %>%
summarize(avg_score = round(mean(score),0)) %>%
arrange(desc(avg_score)) %>%
mutate(highest_score = ifelse(avg_score == max(avg_score),TRUE,FALSE), measure = "response rate")
survey_3_rr_grouped
bar_chart_survey_3_rr_grouped <- ggplot(data = survey_3_rr_grouped) +
aes(x = reorder(response_type, avg_score), y = avg_score, fill = highest_score) +
scale_fill_manual(values = c("TRUE" = "blue", "FALSE" = "light gray"))+
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Average Response Rates by Group", x = "Group", y = "Average Response Rate")+
theme(panel.background = element_blank()) +
geom_text(aes(label= paste0(avg_score,"%")),hjust = -0.1) +
theme(plot.title = element_text(hjust = -0.1, face = "bold"), axis.title.y = element_text(hjust=0.9, vjust= 2, color = "gray"),
axis.title.x = element_text(hjust=0, color = "gray"), axis.text.x = element_text(color = "gray"),
legend.position = "none")
bar_chart_survey_3_rr_grouped
survey_3_rr <- survey_3 %>%
filter(metric == "response rate")
survey_3_rr
boxplot_survey_3_rr <- ggplot(data = survey_3_rr) +
aes(x = reorder(response_type,score, FUN = mean), y = score) +
geom_boxplot() +
coord_flip()+
theme(panel.background = element_blank()) +
labs(title = "Average Response Rates by Group", x = "Group", y = "Response Rate") +
theme(plot.title = element_text(hjust = -0.1, face = "bold"), axis.title.y = element_text(hjust=0.9, vjust= 2, color = "gray"),
axis.title.x = element_text(hjust=0, color = "gray"), axis.text.x = element_text(color = "gray"),
legend.position = "none")
boxplot_survey_3_rr
make_boxplot <- function(dat, metric = dat$metric) {
ggplot(data = dat) +
aes(x = reorder(response_type,score, FUN = mean), y = score) +
geom_boxplot() +
coord_flip()+
theme(panel.background = element_blank()) +
labs(title = paste("Average", metric, "by group"), x = "group", y = metric) +
theme(plot.title = element_text(hjust = -0.1, face = "bold"), axis.title.y = element_text(hjust=0.9, vjust= 2, color = "gray"),
axis.title.x = element_text(hjust=0, color = "gray"), axis.text.x = element_text(color = "gray"),
legend.position = "none")
}
make_boxplot(survey_3_rr)
#for (metric in col)
metrics <- count(survey_3,metric)[1]
make_boxplot_data <- function(dat, chosen_metric){
dat %>%
filter(metric == chosen_metric)
}
make_boxplot_data(survey_3, "communication")
metrics_2 <- unlist(metrics)
metrics_2
## metric1 metric2 metric3
## "academic expectations" "communication" "engagement"
## metric4 metric5 metric6
## "number" "number eligible" "response rate"
## metric7
## "safety"
make_boxplot(make_boxplot_data(survey_3,"communication"))
for (m in unlist(metrics)){
boxplot_data <- make_boxplot_data(survey_3,m)
boxplot <- make_boxplot(boxplot_data)
print(boxplot)
}
make_bar_chart <- function(dat, measure = dat$measure){
ggplot(data = dat) +
aes(x = reorder(response_type, avg_score), y = avg_score, fill = highest_score) +
scale_fill_manual(values = c("TRUE" = "blue", "FALSE" = "light gray"))+
geom_bar(stat = "identity") +
coord_flip() +
labs(title = paste("Average", measure, "by group"), x = "group", y = measure)+
theme(panel.background = element_blank()) +
geom_text(aes(label= avg_score,hjust = -0.1)) +
theme(plot.title = element_text(hjust = -0.1, face = "bold"), axis.title.y = element_text(hjust=0.9, vjust= 2, color = "gray"),
axis.title.x = element_text(hjust=0, color = "gray"), axis.text.x = element_text(color = "gray"),
legend.position = "none")
}
make_bar_chart(survey_3_rr_grouped)
make_bar_chart_data <- function(dat, chosen_measure){
dat %>%
filter(metric == chosen_measure) %>%
group_by(response_type) %>%
summarize(avg_score = round(mean(score),1)) %>%
arrange(desc(avg_score)) %>%
mutate(highest_score = ifelse(avg_score == max(avg_score),TRUE,FALSE), measure = chosen_measure)
}
make_bar_chart(make_bar_chart_data(survey_3,"communication"))
for (m in unlist(metrics)){
bar_chart_data <- make_bar_chart_data(survey_3,m)
bar_chart <- make_bar_chart(bar_chart_data)
print(bar_chart)
}
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.